library(survey)

# Import Data -------------------------------------------------------------
#setwd("")
source("01_cleaning_genderscienceIAT_2020.R")

# Create Targets ----------------------------------------------------------
WEEKS <- unique(impdat20_gendsci$week)
set.seed(1693)
weight_week <- sample(WEEKS[1:16], 1) # any week between january and march
weight_week 
weight_week_dat <- subset(impdat20_gendsci, week == weight_week  & 
                            !is.na(sex)  & !is.na(race5) & !is.na(edu_cat4) & !is.na(age_cat4) & 
                            !is.na(census_region) & !is.na(broughtwebsite2) & 
                            !is.na(ideo3))
base_week <- svydesign(~1, data = weight_week_dat)

# FUNCTION FOR CREATING TARGETS FROM AUXILIARY INFORMATION AND FORMULA
# From Caughey et al 2020
create_targets <- function (target_design, target_formula) {
  target_mf <- model.frame(target_formula, model.frame(target_design))
  target_mm <- model.matrix(target_formula, target_mf)
  wts <- weights(target_design)
  colSums(target_mm * wts) / sum(wts) # returns vector of targets
}

# Note Targets
target_formula <- ~ (race3 + ideo3_lab + sex)^2 + 
  (race3 + ideo3_lab + edu_cat4)^3 + 
  (race3 + ideo3_lab + age_cat4)^3 + 
  (race3 + ideo3_lab + census_region)^2 + 
  (race3 + ideo3_lab + broughtwebsite2)^2
pop_targets <- create_targets(base_week, target_formula)
pop_targets


# Creating Weights by Week ------------------------------------------------
impdat20.weights <- array(NA, c(1, 2))
colnames(impdat20.weights) <- c("session_id", "WeekWeights")

WEEKS <- sort(unique(impdat20_gendsci$week))

for(i in 1:length(WEEKS)){
  print(i/length(WEEKS))
  dat <- impdat20_gendsci[which(impdat20_gendsci$week == WEEKS[i]),]
  dat <- subset(dat, !is.na(sex) & !is.na(race5) & !is.na(edu_cat4) & !is.na(age_cat4) & !is.na(census_region)
                & !is.na(broughtwebsite2) & !is.na(ideo3))
  
  print(nrow(dat))
  
  impdat20.unw <- svydesign(ids = ~1, data = dat)
  x <- array(NA, c(nrow(dat), 2))
  x[,1] <- dat$session_id
  
  if(nrow(dat) > 500){
    
    d_weighted <- calibrate(design = impdat20.unw,
                            formula = target_formula,
                            population = pop_targets,
                            calfun = "raking",
                            maxit = 2000, 
                            epsilon = 0.398) 
    
    ## Verify targets (uncomment for weeks if wanted)
    ### means
    # unadjusted
    # svymean(~sex + edu_cat4 + race3 + age_cat4 + census_region + broughtwebsite2 + ideo3_lab, impdat20.unw)
    # # target
    # svymean(~sex + edu_cat4 + race3 + age_cat4 + census_region + broughtwebsite2 + ideo3_lab, base_week)
    # # rake
    # svymean(~sex + edu_cat4 + race3 + age_cat4 + census_region + broughtwebsite2 + ideo3_lab, d_weighted)
    # # unadjusted
    # svyby(~edu_cat4, ~ideo3_lab, impdat20.unw, svymean)
    # # target
    # svyby(~edu_cat4, ~ideo3_lab, base_week, svymean)
    # # rake
    # svyby(~edu_cat4, ~ideo3_lab, d_weighted, svymean)
    
    x[,2] <- weights(d_weighted)
  }
  
  if(nrow(dat) < 500){
    x[,2] <- NA
  }
  
  impdat20.weights <- rbind(impdat20.weights,x)
}

impdat20.GendSci.WeekWeight <- merge(impdat20_gendsci, impdat20.weights, by = "session_id", all.x = T)
rm(impdat20.weights, impdat20.unw)

# summary(impdat20.GendSci.WeekWeight$WeekWeights)
# hist(impdat20.GendSci.WeekWeight$WeekWeights)
# sd(impdat20.GendSci.WeekWeight$WeekWeights, na.rm = T)

save(impdat20.GendSci.WeekWeight, file = "./data/impdat20.GendSci.WeekWeight.rdata")


